# LOAD NECESSARY PACKAGES ------------------------------------------------------

library(pacman)

# tidyverse
p_load(dplyr, tidyr, stringr, forcats, purrr, ggplot2, patchwork, broom, readr, 
       tidylog)

# analysis
p_load(naniar, did, modelsummary, estimatr)

# misc
p_load(here, kableExtra)

# NIGERIA ----------------------------------------------------------------------

# Define consistent outcomes variables and labels

hh_nfeoutcomes <- c("nfe_tot", "nfe_retail", "nfe_manuf",
                    "nfe_wmntot", "nfe_wmnretail", "nfe_wmnmanuf")

hh_nfeoutcomes_labels <- c("NFE All", "NFE Retail", "NFE Manuf.",
                           "NFE All (Wmn)", "NFE Retail (Wmn)", "NFE Manuf. (Wmn)")

indiv_empoutcomes <- c("emp", "emp_nonfarm", "emp_farm")

indiv_empoutcomes_labels <- c("Emp. All", "Emp. Non-farm", "Emp. Farm")

# Define balancing covariates and labels

vill_covar <- c("dist_road", "dist_mkt", "dist_admctr")

vill_covar_labels <- c("Distance to road", "Distance to market", 
                       "Distance to admin. centre")

hh_covar <- c("hh_size", "hh_avgage", "hh_avgmale", "hh_headmarr", "hh_educ")

hh_covar_labels <- c("HH size", "HH avg. adult age", "HH avg. adult males", 
                     "Head married", "HH highest educ. years")

indiv_covar <- c("hh_size", "educ", "age", "sexfem", "mar")

indiv_covar_labels <- c("HH size", "Education years", "Age", "Female", "Married")

# NIGERIA - HOUSEHOLD TREATMENT ------------------------------------------------

# Load data
nga_hh_panel <- readRDS(here::here("Data", "nigeria_hhpanel.rds"))
nga_indiv_panel <- readRDS(here::here("Data", "nigeria_indivpanel.rds"))

# Prepare analysis dataset -----------------------------------------------------

# Define treatment as those without grid in wave 1 but grid in waves 2 and 3

a <- nga_hh_panel %>% 
  filter(year == 2010, el_grid == 0) %>% 
  pull(hhid)

b <- nga_hh_panel %>% 
  filter(year == 2012, el_grid == 1) %>% 
  pull(hhid)

c <- nga_hh_panel %>% 
  filter(year == 2015, el_grid == 1) %>% 
  pull(hhid)

treated2 <- intersect(intersect(a,b),c)
treated3 <- setdiff(intersect(a,c),intersect(a,b))
rm(a,b,c)

a <- nga_hh_panel %>% 
  filter(year == 2010, el_grid == 0) %>% 
  pull(hhid)

b <- nga_hh_panel %>% 
  filter(year == 2012, el_grid == 0) %>% 
  pull(hhid)

c <- nga_hh_panel %>% 
  filter(year == 2015, el_grid == 0) %>% 
  pull(hhid)

nontreated <- intersect(intersect(a,b),c)
rm(a,b,c)

a <- nga_hh_panel %>% 
  filter(year == 2010, el_grid == 1) %>% 
  pull(hhid)

b <- nga_hh_panel %>% 
  filter(year == 2012, el_grid == 1) %>% 
  pull(hhid)

c <- nga_hh_panel %>% 
  filter(year == 2015, el_grid == 1) %>% 
  pull(hhid)

treated1 <- intersect(intersect(a,b),c)
rm(a,b,c)

# Set treatment group as uniform survey wave periods and change survey wave
# years accordingly
nga_hh_panel <- nga_hh_panel %>% 
  mutate(
    treatmentgroup = case_when(
      hhid %in% nontreated ~ 0,
      hhid %in% treated1 ~ 1,
      hhid %in% treated2 ~ 2,
      hhid %in% treated3 ~ 3),
    year = case_when(
      year == 2010 ~ 1,
      year == 2012 ~ 2,
      year == 2015 ~ 3
    )
  )

rm(treated2, treated3, nontreated, treated1)

# Create analysis datasets -----------------------------------------------------

nga_hh_did_df <- nga_hh_panel %>% 
  # Remove mixed treatment households, removes 2,741 rows (20%)
  filter(!is.na(treatmentgroup)) %>% 
  select(year, ea, rural, hhid, treatmentgroup, el_grid,
         all_of(c(hh_nfeoutcomes, vill_covar, hh_covar))) %>% 
  na.omit() %>% 
  # Select rural panel subset, removes 3,222 rows (30%)
  group_by(hhid) %>% 
  filter(rural == 1, n() == 3) %>% 
  ungroup() %>% 
  mutate(across(c("nfe_tot", "nfe_retail", "nfe_manuf",
                   "nfe_wmntot", "nfe_wmnretail", "nfe_wmnmanuf"), ~as.numeric(. > 0))) %>% 
  # Convert long eaid character strings to unique integer
  mutate(eaid = as.numeric(as.factor(ea))) %>% 
  select(year, eaid, hhid, treatmentgroup, el_grid,
         all_of(c(hh_nfeoutcomes, vill_covar, hh_covar))) 

nga_indiv_did_df <- nga_indiv_panel %>% 
  # Convert years to match survey waves as for HH dataset
  mutate(year = case_when(
    year == 2010 ~ 1,
    year == 2012 ~ 2,
    year == 2015 ~ 3)
  ) %>% 
  inner_join(nga_hh_did_df %>% select(hhid, year, treatmentgroup)) %>% 
  select(hhid, indiv, year, treatmentgroup, el_grid, indiv_empoutcomes, vill_covar, indiv_covar) %>% 
  # Convert long eaid character strings to unique integer
  mutate(indiv = as.numeric(as.factor(indiv)))
 
# Set household ID to unique numeric (after joining above)
nga_hh_did_df <- nga_hh_did_df %>% 
  mutate(hhid = as.numeric(as.factor(hhid))) 
  
# Summary table of covariates in baseline year ---------------------------------

a <- nga_indiv_did_df %>% 
  filter(year == min(year)) %>% 
  select(treatmentgroup, indiv_covar) %>% 
  rename_at(vars(indiv_covar), ~indiv_covar_labels) %>% 
  select(-`HH size`) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1"))
  ) %>%
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~ treatmentgroup * (Mean + SD), data = .,
                            output = "data.frame")

nga_hh_did_df %>% 
  filter(year == min(year)) %>% 
  select(treatmentgroup, vill_covar, hh_covar) %>% 
  rename_at(vars(hh_covar), ~hh_covar_labels) %>% 
  rename_at(vars(vill_covar), ~vill_covar_labels) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1"))
  ) %>%
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~ treatmentgroup * (Mean + SD), data = ., 
                            add_rows = a, output = "kableExtra", format = "latex") %>% 
  pack_rows("Household-level dataset", 1, 8) %>% 
  pack_rows("Individual-level dataset", 9, 12) %>% 
  gsub(".(begin|end){table.*}", "", ., perl = TRUE) %>% 
  save_kable(file = here("Manuscript", "Tables", "nga_hh_covs.tex"))

# Summary table of outcome variables simple trends -----------------------------

a <- nga_indiv_did_df %>% 
  select(year, treatmentgroup, indiv_empoutcomes) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1")),
         year = factor(year)
  ) %>%
  rename_at(vars(indiv_empoutcomes), ~indiv_empoutcomes_labels) %>%
  select(indiv_empoutcomes_labels,
         Year = year, Group = treatmentgroup) %>% 
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~  Group * Year * Mean, data = .,
                            output = "data.frame")

nga_hh_did_df %>% 
  select(year, treatmentgroup, hh_nfeoutcomes) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1")),
         year = factor(year)
  ) %>%
  rename_at(vars(hh_nfeoutcomes), ~hh_nfeoutcomes_labels) %>%
  select(hh_nfeoutcomes_labels, Year = year, Group = treatmentgroup) %>% 
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~  Group * Year * Mean, data = .,
                            add_rows = a, output = "kableExtra", format = "latex") %>% 
  pack_rows("Household-level dataset", 1, 6) %>% 
  pack_rows("Individual-level dataset", 7, 9) %>% 
  gsub(".(begin|end){table.*}", "", ., perl = TRUE) %>% 
  save_kable(file = here("Manuscript", "Tables", "nga_hh_outcomes.tex"))

# Propensity weighted staggered DiD regression analysis ------------------------

# Function for staggered DiD, aggregated to group-level

staggdid_hh_group <- function(varvec) {
  
  resultstbl_group <- tibble(type = NA_character_,
                       term   = NA_character_,
                       estimate  = NA_real_,
                       std.error  = NA_real_,
                       conf.low  = NA_real_,
                       conf.high  = NA_real_,
                       point.conf.low  = NA_real_,
                       point.conf.high  = NA_real_,
                       covs = NA_character_, 
                       model = NA_character_, 
                       .rows = 0
  )
  
  for (i in varvec) {
    
    print(paste0("Running model: ", {{i}}, " with no covariates"))

    model1 <- att_gt(yname =  {{i}},
                     tname = "year",
                     idname = "hhid",
                     gname = "treatmentgroup",
                     clustervars = "hhid",
                     data = nga_hh_did_df,
                     bstrap = TRUE, cband = TRUE)

    print(paste0("Running model: ", {{i}}, " with village covariates"))
    
    model2 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "hhid",
                     clustervars = "hhid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(vill_covar, collapse = " + "))),
                     data = nga_hh_did_df,
                     bstrap = TRUE, cband = TRUE)

    print(paste0("Running model: ", {{i}}, " with village and household covariates"))

    model3 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "hhid",
                     clustervars = "hhid", 
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(c(vill_covar, hh_covar), collapse = " + "))),
                     data = nga_hh_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Estimating aggregated results for model: ", {{i}}))
  
    resultstbl_group <-
      list(resultstbl_group,
           aggte(model1, type = "group") %>% did::tidy() %>% mutate(covs = "Stagg.DiD - no covs.", model = {{i}}),
           aggte(model2, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill covs.", model = {{i}}),
           aggte(model3, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill & hh covs.", model = {{i}})) %>% 
      dplyr::bind_rows()
    
    print(paste0("Done with model: ", {{i}}))

  }
  
  return(resultstbl_group)
  
}

staggdid_indiv_group <- function(varvec) {
  
  resultstbl_group <- tibble(type = NA_character_,
                             term   = NA_character_,
                             estimate  = NA_real_,
                             std.error  = NA_real_,
                             conf.low  = NA_real_,
                             conf.high  = NA_real_,
                             point.conf.low  = NA_real_,
                             point.conf.high  = NA_real_,
                             covs = NA_character_, 
                             model = NA_character_, 
                             .rows = 0
  )
  
  for (i in varvec) {
    
    print(paste0("Running model: ", {{i}}, " with no covariates"))
    
    model1 <- att_gt(yname =  {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup", 
                     data = nga_indiv_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village covariates"))
    
    model2 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(vill_covar, collapse = " + "))),
                     data = nga_indiv_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village and household covariates"))
    
    model3 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(c(vill_covar, indiv_covar), collapse = " + "))),
                     data = nga_indiv_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Estimating aggregated results for model: ", {{i}}))
    
    resultstbl_group <-
      list(resultstbl_group,
           aggte(model1, type = "group") %>% did::tidy() %>% mutate(covs = "Stagg.DiD - no covs.", model = {{i}}),
           aggte(model2, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill covs.", model = {{i}}),
           aggte(model3, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill & indiv. covs.", model = {{i}})) %>% 
      dplyr::bind_rows()
    
    print(paste0("Done with model: ", {{i}}))
    
  }
  
  return(resultstbl_group)
  
}

# Run model for nfe outcomes

hh_nfeoutcomes_group_tbl <- staggdid_hh_group(all_of(c(hh_nfeoutcomes)))

# Run model for employment outcomes

indiv_empoutcomes_group_tbl <- staggdid_indiv_group(all_of(indiv_empoutcomes))

# Prepare appendix estimation plots --------------------------------------------

# Plot results for nfe outcomes
hh_nfeoutcomesgroup_plot <- hh_nfeoutcomes_group_tbl %>%
  select(model, term, estimate, conf.low, conf.high, covs) %>% 
  filter(model %in% hh_nfeoutcomes) %>% 
  mutate(model = factor(model, levels = c(hh_nfeoutcomes), 
                        labels = c(hh_nfeoutcomes_labels))) %>%
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high, colour = covs, shape = covs)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.2) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  coord_cartesian(ylim = c(-0.2,0.2)) +
  scale_y_continuous(breaks = scales::pretty_breaks()) +
  theme_bw() +
  facet_wrap(~model, ncol = 3) +
  labs(x = "Treatment Group",
       y = "ATT (Percentage point change in likelihood)",
       colour = "Specification",
       shape = "Specification")

wrap_plots(hh_nfeoutcomesgroup_plot) + 
  plot_layout(guides = "collect") +
  plot_annotation(subtitle = "Estimated percentage point change in NFE likelihood.") & 
  theme(legend.position = "bottom")

ggsave(here("Manuscript", "Figures", "Annex", "nga_nfedid_hh_detail.png"),
       width = 10, height = 6)

# Plot results for employment outcomes
indiv_empoutcomes_group_lot <- indiv_empoutcomes_group_tbl %>% 
  select(model, term, estimate, conf.low, conf.high, covs) %>% 

  mutate(model = factor(model, levels = indiv_empoutcomes, labels = indiv_empoutcomes_labels)) %>%
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high, colour = covs, shape = covs)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.2) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  coord_cartesian(ylim = c(-0.2,0.2)) +
  scale_y_continuous(breaks = scales::pretty_breaks()) +
  theme_bw() +
  facet_wrap(~model, ncol = 3) +
  labs(x = "Treatment Group",
       y = "ATT (percentage point change in likelihood of employment)",
       colour = "Specification",
       shape = "Specification")

wrap_plots(indiv_empoutcomes_group_lot) + 
  plot_layout(guides = "collect") +
  plot_annotation(subtitle = "Estimated percentage point change in employment, by treatment cohort and averaged.") & 
  theme(legend.position = "bottom")

ggsave(here("Manuscript", "Figures", "Annex", "nga_empdid_indiv_detail.png"),
       width = 10, height = 6)

# NIGERIA - COMMUNITY TREATMENT (SURVEY) ---------------------------------------

# LOAD DATA 
nga_comm_panel <- readRDS(here::here("Data", "nigeria_hhpanel.rds"))
nga_indivcomm_panel <- readRDS(here::here("Data", "nigeria_indivpanel.rds"))

# Prepare analysis dataset -----------------------------------------------------

# Define treatment as those without grid in wave 1, and grid in waves 2 and 3

a <- nga_comm_panel %>% 
  filter(year == 2010, com_grid == 0) %>% 
  pull(hhid)

b <- nga_comm_panel %>% 
  filter(year == 2012, com_grid == 1) %>% 
  pull(hhid)

c <- nga_comm_panel %>% 
  filter(year == 2015, com_grid == 1) %>% 
  pull(hhid)

treated2 <- intersect(intersect(a,b),c)
treated3 <- setdiff(intersect(a,c),intersect(a,b))
rm(a,b,c)

a <- nga_comm_panel %>% 
  filter(year == 2010, com_grid == 0) %>% 
  pull(hhid)

b <- nga_comm_panel %>% 
  filter(year == 2012, com_grid == 0) %>% 
  pull(hhid)

c <- nga_comm_panel %>% 
  filter(year == 2015, com_grid == 0) %>% 
  pull(hhid)

nontreated <- intersect(intersect(a,b),c)
rm(a,b,c)

a <- nga_comm_panel %>% 
  filter(year == 2010, com_grid == 1) %>% 
  pull(hhid)

b <- nga_comm_panel %>% 
  filter(year == 2012, com_grid == 1) %>% 
  pull(hhid)

c <- nga_comm_panel %>% 
  filter(year == 2015, com_grid == 1) %>% 
  pull(hhid)

treated1 <- intersect(intersect(a,b),c)
rm(a,b,c)

# Set treatment group as uniform survey wave periods and change survey wave
# years accordingly
nga_comm_panel <- nga_comm_panel %>% 
  mutate(treatmentgroup = case_when(
    hhid %in% nontreated ~ 0,
    hhid %in% treated1 ~ 1,
    hhid %in% treated2 ~ 2,
    hhid %in% treated3 ~ 3),
    year = case_when(
      year == 2010 ~ 1,
      year == 2012 ~ 2,
      year == 2015 ~ 3
    )
  )
  
rm(treated2, treated3, nontreated, treated1)

# Create analysis dataset ------------------------------------------------------

nga_comm_did_df <- nga_comm_panel %>% 
  # Remove mixed treatment households, removes 2,741 rows (20%)
  filter(!is.na(treatmentgroup)) %>% 
  select(year, ea, rural, hhid, treatmentgroup, el_grid,
         all_of(c(hh_nfeoutcomes, vill_covar, hh_covar))) %>% 
  na.omit() %>% 
  # Select rural panel subset, removes 3,222 rows (30%)
  group_by(hhid) %>% 
  filter(rural == 1, n() == 3) %>% 
  ungroup() %>% 
  mutate(across(c("nfe_tot", "nfe_retail", "nfe_manuf",
                  "nfe_wmntot", "nfe_wmnretail", "nfe_wmnmanuf"), ~as.numeric(. > 0))) %>% 
  # Convert long eaid character strings to unique integer
  mutate(eaid = as.numeric(as.factor(ea))) %>% 
  select(year, eaid, hhid, treatmentgroup, el_grid,
         all_of(c(hh_nfeoutcomes, vill_covar, hh_covar)))

nga_indivcomm_did_df <- nga_indivcomm_panel %>%
  # Convert years to match survey waves as for HH dataset
  mutate(year = case_when(
    year == 2010 ~ 1,
    year == 2012 ~ 2,
    year == 2015 ~ 3
  )) %>% 
  inner_join(nga_comm_did_df %>% select(hhid, eaid, year, treatmentgroup)) %>% 
  select(eaid, hhid, indiv, year, treatmentgroup, el_grid, indiv_empoutcomes, vill_covar, indiv_covar) %>% 
  # Convert long eaid/hhid character strings to unique integer
  mutate(indiv = as.numeric(as.factor(as.character(indiv))))

# Set household ID to unique numeric (after joining above)
nga_comm_did_df <- nga_comm_did_df %>% 
  mutate(hhid = as.numeric(as.factor(as.character(hhid)))) 

# Summary table of covariates in baseline year ---------------------------------

a <- nga_indivcomm_did_df %>% 
  filter(year == min(year)) %>% 
  select(treatmentgroup, indiv_covar) %>% 
  rename_at(vars(indiv_covar), ~indiv_covar_labels) %>% 
  select(-`HH size`) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1"))
  ) %>%
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~ treatmentgroup * (Mean + SD), data = .,
                            output = "data.frame")

nga_comm_did_df %>% 
  filter(year == min(year)) %>% 
  select(treatmentgroup, vill_covar, hh_covar) %>% 
  rename_at(vars(hh_covar), ~hh_covar_labels) %>% 
  rename_at(vars(vill_covar), ~vill_covar_labels) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1"))
  ) %>%
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~ treatmentgroup * (Mean + SD), data = ., 
                            add_rows = a, output = "kableExtra", format = "latex") %>% 
  pack_rows("Household-level dataset", 1, 8) %>% 
  pack_rows("Individual-level dataset",9, 12) %>% 
  gsub(".(begin|end){table.*}", "", ., perl = TRUE) %>% 
  save_kable(file = here("Manuscript", "Tables", "nga_comm_covs.tex"))

# Summary table of outcome variables simple trends -----------------------------

a <- nga_indiv_did_df %>% 
  select(year, treatmentgroup, indiv_empoutcomes) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1")),
         year = factor(year)
  ) %>%
  rename_at(vars(indiv_empoutcomes), ~indiv_empoutcomes_labels) %>%
  select(indiv_empoutcomes_labels,
         Year = year, Group = treatmentgroup) %>% 
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~  Group * Mean * Year, data = .,
                            output = "data.frame")

nga_comm_did_df %>% 
  select(year, treatmentgroup, hh_nfeoutcomes) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1")),
         year = factor(year)
  ) %>%
  rename_at(vars(hh_nfeoutcomes), ~hh_nfeoutcomes_labels) %>%
  select(hh_nfeoutcomes_labels, Year = year, Group = treatmentgroup) %>% 
  as.data.frame() %>% 
  modelsummary::datasummary(All(.) ~  Group * Mean * Year, data = .,
                            add_rows = a, output = "kableExtra", format = "latex") %>% 
  pack_rows("Household-level dataset", 1,6) %>% 
  pack_rows("Individual-level dataset",7, 9) %>% 
  gsub(".(begin|end){table.*}", "", ., perl = TRUE) %>% 
  save_kable(file = here("Manuscript", "Tables", "nga_comm_outcomes.tex"))

# Propensity weighted staggered DiD regression analysis ------------------------

# Function for staggered DiD, aggregated to group level

staggdid_comm_group <- function(varvec) {
  
  resultstbl_group <- tibble(type = NA_character_,
                             term   = NA_character_,
                             group  = NA_character_,
                             estimate  = NA_real_,
                             std.error  = NA_real_,
                             conf.low  = NA_real_,
                             conf.high  = NA_real_,
                             point.conf.low  = NA_real_,
                             point.conf.high  = NA_real_,
                             covs = NA_character_, 
                             model = NA_character_, 
                             .rows = 0
  )
  
  for (i in varvec) {
    
    print(paste0("Running model: ", {{i}}, " with no covariates"))
    
    model1 <- att_gt(yname =  {{i}},
                     tname = "year",
                     idname = "hhid",
                     clustervars = "eaid",
                     gname = "treatmentgroup",
                     data = nga_comm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village covariates"))
    
    model2 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "hhid",
                     clustervars = "eaid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(vill_covar, collapse = " + "))),
                     data = nga_comm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village and household covariates"))
    
    model3 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "hhid",
                     clustervars = "eaid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(c(vill_covar, hh_covar), collapse = " + "))),
                     data = nga_comm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Estimating aggregated results for model: ", {{i}}))
    
    resultstbl_group <-
      list(resultstbl_group,
           aggte(model1, type = "group") %>% did::tidy() %>% mutate(covs = "Stagg.DiD - no covs.", model = {{i}}),
           aggte(model2, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill covs.", model = {{i}}),
           aggte(model3, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill & hh covs.", model = {{i}})) %>% 
      dplyr::bind_rows()
    
    print(paste0("Done with model: ", {{i}}))
    
  }
  
  return(resultstbl_group)
  
}

staggdid_indivcomm_group <- function(varvec) {
  
  resultstbl_group <- tibble(type = NA_character_,
                             term   = NA_character_,
                             group  = NA_character_,
                             estimate  = NA_real_,
                             std.error  = NA_real_,
                             conf.low  = NA_real_,
                             conf.high  = NA_real_,
                             point.conf.low  = NA_real_,
                             point.conf.high  = NA_real_,
                             covs = NA_character_, 
                             model = NA_character_, 
                             .rows = 0
  )
  
  for (i in varvec) {
    
    print(paste0("Running model: ", {{i}}, " with no covariates"))
    
    model1 <- att_gt(yname =  {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup", 
                     data = nga_indivcomm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village covariates"))
    
    model2 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(vill_covar, collapse = " + "))),
                     data = nga_indivcomm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Running model: ", {{i}}, " with village and household covariates"))
    
    model3 <- att_gt(yname = {{i}},
                     tname = "year",
                     idname = "indiv",
                     clustervars = "hhid",
                     gname = "treatmentgroup",
                     xformla = as.formula(paste0("~",paste(c(vill_covar, indiv_covar), collapse = " + "))),
                     data = nga_indivcomm_did_df,
                     bstrap = TRUE, cband = TRUE)
    
    print(paste0("Estimating aggregated results for model: ", {{i}}))
    
    resultstbl_group <-
      list(resultstbl_group,
           aggte(model1, type = "group") %>% did::tidy() %>% mutate(covs = "Stagg.DiD - no covs.", model = {{i}}),
           aggte(model2, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill covs.", model = {{i}}),
           aggte(model3, type = "group") %>% did::tidy() %>% mutate(covs = "DRDiD - vill & indiv. covs.", model = {{i}})) %>% 
      dplyr::bind_rows()
    
    print(paste0("Done with model: ", {{i}}))
    
  }
  
  return(resultstbl_group)
  
}


# Run model for nfe outcomes

comm_nfeoutcomes_group_tbl <- staggdid_comm_group(all_of(c(hh_nfeoutcomes)))

# Run model for employment outcomes

indivcomm_empoutcomes_group_tbl <- staggdid_indivcomm_group(all_of(indiv_empoutcomes))

# Prepare appendix estimation plots --------------------------------------------

# Plot results for nfe outcomes
comm_nfeoutcomesgroup_plot <- comm_nfeoutcomes_group_tbl %>%
  select(model, term, estimate, conf.low, conf.high, covs) %>% 
  filter(model %in% hh_nfeoutcomes) %>% 
  mutate(model = factor(model, levels = c(hh_nfeoutcomes), 
                        labels = c(hh_nfeoutcomes_labels))) %>%
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high, colour = covs, shape = covs)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.2) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  coord_cartesian(ylim = c(-0.2,0.2)) +
  scale_y_continuous(breaks = scales::pretty_breaks()) +
  theme_bw() +
  facet_wrap(~model, ncol = 3) +
  labs(x = "Treatment Group",
       y = "ATT (Percentage point change in likelihood)",
       colour = "Specification",
       shape = "Specification")

wrap_plots(comm_nfeoutcomesgroup_plot) + 
  plot_layout(guides = "collect") +
  plot_annotation(subtitle = "Estimated percentage point change in NFE likelihood.") & 
  theme(legend.position = "bottom")

ggsave(here("Manuscript", "Figures", "Annex", "nga_nfedid_comm_detail.png"),
       width = 10, height = 6)

# Plot results for employment outcomes
indivcomm_empoutcomes_group_lot <- indivcomm_empoutcomes_group_tbl %>% 
  select(model, term, estimate, conf.low, conf.high, covs) %>% 
  
  mutate(model = factor(model, levels = indiv_empoutcomes, labels = indiv_empoutcomes_labels)) %>%
  ggplot(aes(x = term, y = estimate, ymin = conf.low, ymax = conf.high, colour = covs, shape = covs)) +
  geom_point(position = position_dodge(width = 0.2)) +
  geom_errorbar(position = position_dodge(width = 0.2), width = 0.2) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.5) +
  coord_cartesian(ylim = c(-0.2,0.2)) +
  scale_y_continuous(breaks = scales::pretty_breaks()) +
  theme_bw() +
  facet_wrap(~model, ncol = 3) +
  labs(x = "Treatment Group",
       y = "ATT (percentage point change in likelihood of employment)",
       colour = "Specification",
       shape = "Specification")

wrap_plots(indivcomm_empoutcomes_group_lot) + 
  plot_layout(guides = "collect") +
  plot_annotation(subtitle = "Estimated percentage point change in employment, by treatment cohort and averaged.") & 
  theme(legend.position = "bottom")

ggsave(here("Manuscript", "Figures", "Annex", "nga_empdid_indivcomm_detail.png"),
       width = 10, height = 6)

# Create treatment group summary table -----------------------------------------

nga_hh_did_df_summary <- nga_hh_did_df %>% 
  filter(year == "3") %>% 
  dplyr::count(treatmentgroup, name = "HHs (HH treat)") 

nga_indiv_did_df_summary <- nga_indiv_did_df %>% 
  filter(year == "3") %>% 
  dplyr::count(treatmentgroup, name = "Indivs. (HH treat)") 

nga_comm_did_df_summary <- nga_comm_did_df %>% 
  filter(year == "3") %>% 
  dplyr::count(treatmentgroup, name = "HHs (Comm. treat)")  

nga_indivcomm_did_df_summary <- nga_indivcomm_did_df %>% 
  filter(year == "3") %>% 
  dplyr::count(treatmentgroup, name = "Indivs. (Comm. treat)")  

nga_did_df_summary <- list(nga_hh_did_df_summary, nga_indiv_did_df_summary,
                           nga_comm_did_df_summary, nga_indivcomm_did_df_summary) %>% 
  reduce(left_join) %>% 
  mutate(treatmentgroup = factor(treatmentgroup, 
                                 levels = c(0, 3, 2, 1),
                                 labels = c("Never Treat", 
                                            "Treat Wave 3", "Treat Wave 2",
                                            "Treat Wave 1 (always treated)"))
  ) %>% 
  arrange(treatmentgroup) %>% 
  pivot_longer(-c(treatmentgroup)) %>% 
  pivot_wider(names_from = treatmentgroup, values_from = value) %>% 
  rename(`Analysis level` = name) 

nga_did_df_summary %>% 
  kbl(., booktabs = T, align = c("l", rep("r",4)), format = "latex") %>% 
  save_kable(file = here("Manuscript", "Tables", "nga_summarystats.tex"), latex_header_includes = F)

# Export results for visualisation ---------------------------------------------

rbind(hh_nfeoutcomes_group_tbl %>% mutate(treatment = "Household"), 
      comm_nfeoutcomes_group_tbl %>% mutate(treatment = "Community")) %>% 
  write_csv(here("Manuscript", "Tables", "Annex", "nga_nfeoutcomes.csv"))

rbind(indiv_empoutcomes_group_tbl %>% mutate(treatment = "Household"), 
      indivcomm_empoutcomes_group_tbl %>% mutate(treatment = "Community"))  %>% 
  write_csv(here("Manuscript", "Tables", "Annex", "nga_empoutcomes.csv"))

